# Replication File for "Combining List Experiment and Direct Question Estimates of Sensitive Behavior Prevalence"
# Peter M. Aronow, Alexander Coppock, Forrest W. Crawford, Donald P. Green
# Source Code

# V = List response
# Z = Treatment Assignment
# Y = Direct Question Response

direct_est <- function(Y){
  est <- mean(Y)
  return(est)
}

direct_var_est <- function(Y){
  var_est <- var(Y)/length(Y)
  return(var_est)
}

conv_est <- function(V,Z) {
  est <- mean(V[Z==1]) - mean(V[Z==0])
  return(est)
}

conv_var_est<- function(V, Z){
  var_est <- (var(V[Z==1])/sum(Z) + var(V[Z==0])/sum(1-Z))
  return(var_est)
}

combined_est <- function(V,Z,Y) {
  est <- mean(Y) + mean(1-Y)*(mean(V[Z==1 & Y == 0]) - mean(V[Z==0 & Y == 0]))
  return(est)
}

combined_var_est <- function(V,Z,Y){
  mu_hat <- combined_est(V = V, Z = Z, Y = Y)
  Y_bar <- direct_est(Y = Y)
  var_Z_1 <- var(V[Z==1 & Y==0])
  var_Z_0 <- var(V[Z==0 & Y==0])
  gamma_hat <- mean(Z)
  var_est <- ((((1-mu_hat)^2)/(1-Y_bar))*Y_bar + 
    (1-Y_bar)*(var_Z_1/gamma_hat + var_Z_0/(1-gamma_hat)))/length(V)
  return(var_est)
}


improve_var <- function(V, Z, Y) {
  return((1 - combined_var_est(V=V,Z = Z,Y = Y) /conv_var_est(V = V, Z = Z))*100)
}

whole_shebang <- function(V,Z,Y){
  return(c(direct_est(Y = Y), direct_var_est(Y = Y)^0.5,
         conv_est(V = V, Z = Z), conv_var_est(V = V, Z = Z)^0.5,
         combined_est(V = V, Z = Z,Y = Y), combined_var_est(V = V, Z = Z,Y = Y)^0.5,
         improve_var(V = V, Z = Z,Y = Y)))
}


## Placebo Tests

placeboI_test <- function(V,Z,Y){
  treated_admitters <- V[Y==1 & Z == 1]
  control_admitters <- V[Y==1 & Z == 0]
  est <- mean(treated_admitters) - mean(control_admitters)
  var <- (var(treated_admitters))/length(treated_admitters) + (var(control_admitters))/length(control_admitters)
  se <- sqrt(var)
  p_value <- 2*pnorm(- abs(est -1)/ se )
  n <- length(treated_admitters ) + length(control_admitters)
  return(c(est=est, se=se, p_value=p_value, n=n))
}

placeboII_test <- function(Z,Y){
  delta_hat <- sum(Y*Z)/sum(Z) - sum(Y*(1-Z))/sum(1-Z)
  var_Z_1 <- var(Y[Z==1])
  var_Z_0 <- var(Y[Z==0])
  var_delta_hat <- var_Z_1/sum(Z) + var_Z_0/sum(1-Z) 
  se_delta_hat <- sqrt(var_delta_hat)
  p_value <- 2*pnorm(- abs(delta_hat)/se_delta_hat)
  n = length(Y)
  return(c(est=delta_hat, se=se_delta_hat, p_value=p_value, n=n))
}

design_affector <- function(x){
  x_star <- x
  x_star[x!=4] <- x[x!=4] + 1
  return(x_star)
}

list_simulator <-function(N_trueadmitter,N_falseconfessor, N_withholder, N_innocent, pr_listliar=0, pr_designaffected=0, baseline_binomial_prob=.5){
  N <- N_trueadmitter + N_withholder + N_falseconfessor + N_innocent
  N_engager <- N_trueadmitter + N_withholder
  N_listliar <- floor(N_engager*pr_listliar)
  N_designaffected <- floor(N*pr_designaffected)
  
  ### No Stochasticity in types (e.g. not drawn from distributions)
  Ystar <- c(rep(1, N_engager), rep(0, N-N_engager))
  withholder <- c(ifelse(1:N_engager %in% sample(1:N_engager, N_withholder), 1, 0),rep(0,N-N_engager))
  falseconfessor <- c(rep(0, N_engager),ifelse(1:(N-N_engager) %in% sample(1:(N-N_engager), N_falseconfessor), 1, 0))
  listliar <- c(ifelse(1:N_engager %in% sample(1:N_engager, N_listliar), 1, 0),rep(0,N-N_engager))
  designaffected <- ifelse(1:N %in% sample(1:N, N_designaffected), 1, 0)
  
  Y <- Ystar * (1-withholder) + (1-Ystar) * falseconfessor
  
  V0 <- rbinom(N, size=4, prob=baseline_binomial_prob)
  V1 <- V0+ Ystar*(1-listliar)
  V1[designaffected==1] <- design_affector(V0[designaffected==1]) +
    Ystar[designaffected==1]*(1-listliar[designaffected==1])  
  
  # List Experiment:
  m <- N/2 # Number treated
  Z <- ifelse(1:N %in% sample(1:N, m),1,0)
  V <- V1*Z + (1-Z)*V0
  reject_null <- as.numeric(placeboI_test(V=V, Z=Z, Y=Y)$p_value<0.05)
  return(list(direct_est=direct_est(Y = Y), 
              conv_est=conv_est(V = V,Z = Z),
              new_est=combined_est(V = V, Z = Z, Y = Y),
              reject_null=reject_null))
}


